## Acquire demographic data on tennis players
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.5 v dplyr 1.0.7
## v tidyr 1.1.3 v stringr 1.4.0
## v readr 2.0.1 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(stringr)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(scales)
##
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
##
## discard
## The following object is masked from 'package:readr':
##
## col_factor
demographics <- read.csv("https://raw.githubusercontent.com/JeffSackmann/tennis_atp/master/atp_players.csv")
demographics <- demographics %>%
rename(Country = ioc, Player_Id = player_id, First_Name = name_first,
Last_Name = name_last, DOB = dob, Height = height, Wikidata_Id = wikidata_id)
country_plot <- demographics %>%
count(Country) %>%
mutate(Prop = n/sum(n),
Percentage = round(Prop*100,2)) %>%
ggplot(aes(x = Country, y = Percentage, fill = Country)) +
geom_bar(stat = "identity") +
coord_cartesian(clip = "off") +
# scale_y_continuous(labels = percent_format()) +
theme(axis.text.x = element_blank(),
axis.title = element_blank()
) +
ggtitle("Country breakdown of Players via Country affiliation") +
labs(x = "Country", y = "Percentage")
ggplotly(country_plot)
Height is in centimeters (cm).